import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
matches = pd.read_csv("Data/matches.csv")
home_away = pd.read_csv("Data/teamwise_home_and_away.csv")
most_runs = pd.read_csv("Data/most_runs_average_strikerate.csv")
deliveries = pd.read_csv("Data/deliveries.csv")
teams = pd.read_csv("Data/teams.csv")
players = pd.read_csv("Data/Players.csv")
teams
teams=teams.replace({'Sunrisers Hyderabad':"SRH", 'Mumbai Indians': "MI", 'Gujarat Lions':"GL",
'Rising Pune Supergiants': "Pune", 'Royal Challengers Bangalore': "RCB",
'Kolkata Knight Riders': "KKR", 'Delhi Daredevils': "Delhi", 'Kings XI Punjab':"Punjab",
'Chennai Super Kings':"CSK", 'Rajasthan Royals': "RR", 'Deccan Chargers':"SRH",
'Kochi Tuskers Kerala':"KTK", 'Pune Warriors':"Pune", 'Delhi Capitals':"Delhi",
'Rising Pune Supergiant':"Pune"})
teams
home_away=home_away.replace({'Sunrisers Hyderabad':"SRH", 'Mumbai Indians': "MI", 'Gujarat Lions':"GL",
'Rising Pune Supergiants': "Pune", 'Royal Challengers Bangalore': "RCB",
'Kolkata Knight Riders': "KKR", 'Delhi Daredevils': "Delhi", 'Kings XI Punjab':"Punjab",
'Chennai Super Kings':"CSK", 'Rajasthan Royals': "RR", 'Deccan Chargers':"SRH",
'Kochi Tuskers Kerala':"KTK", 'Pune Warriors':"Pune", 'Delhi Capitals':"Delhi",
'Rising Pune Supergiant':"Pune"})
home_away
matches=matches.replace({'Sunrisers Hyderabad':"SRH", 'Mumbai Indians': "MI", 'Gujarat Lions':"GL",
'Rising Pune Supergiants': "Pune", 'Royal Challengers Bangalore': "RCB",
'Kolkata Knight Riders': "KKR", 'Delhi Daredevils': "Delhi", 'Kings XI Punjab':"Punjab",
'Chennai Super Kings':"CSK", 'Rajasthan Royals': "RR", 'Deccan Chargers':"SRH",
'Kochi Tuskers Kerala':"KTK", 'Pune Warriors':"Pune", 'Delhi Capitals':"Delhi",
'Rising Pune Supergiant':"Pune"})
matches
most_runs
deliveries
players
most_wins_df=matches['winner'].value_counts()
most_wins_df=pd.DataFrame(most_wins_df)
plt.figure(figsize=(12,8))
sns.barplot(x = most_wins_df.index,y = most_wins_df.winner)
plt.ylabel("Number of Matches Won",fontsize = 14)
plt.xticks(fontsize = 14)
plt.title("NUMBER OF MATCHES WON BY A TEAM",{"fontsize":16});
toss_winner = matches['toss_winner'].value_counts().reset_index()
toss_winner
sns.set(rc={'figure.figsize':(15,8)})
ax = plt.axes()
ax.set(facecolor = 'lightblue')
plt.title(' Number Of Tosses Win By Team',fontsize = 20)
sns.barplot(y = toss_winner['index'] ,x = toss_winner['toss_winner'],orient = 'h',palette = 'cubehelix')
plt.xlabel('Total Toss Wins')
plt.ylabel('Teams')
plt.show()
plt.figure(figsize = (15,10))
ax = plt.axes()
ax.grid(False)
sns.countplot(data = matches,x = 'Season',hue = 'toss_decision',linewidth=5)
plt.xticks(rotation = 90)
plt.title('Toss decisions by season',fontsize = 20)
plt.xlabel('Seasons')
plt.ylabel('Count')
plt.show()
plt.figure(figsize = (12,10))
ax = plt.axes()
ax.grid(False)
ax = sns.countplot(data =matches, x = 'venue', order = matches['venue'].value_counts().index[0:10], palette = 'gnuplot',linewidth=5)
for p in ax.patches:
ax.text(p.get_x() + p.get_width()/2., p.get_height(), '%d' % int(p.get_height()),
fontsize=15, color='black', ha='center', va='bottom')
plt.xlabel('Venues',fontsize = 15)
plt.ylabel('Total Matches',fontsize = 15)
plt.xticks(rotation = 90, fontsize = 12)
plt.yticks(fontsize = 12)
plt.title('Top 10 host venues',fontsize = 20)
plt.show()
most_mom_df = pd.DataFrame(data = matches["player_of_match"].value_counts())
most_mom_df = most_mom_df.reset_index()
most_mom_df = most_mom_df.rename(columns={"index":"player","player_of_match":"num of time won"})
most_mom_df = most_mom_df[:10]
most_mom_df
plt.figure(figsize=(12,6))
sns.barplot(x = most_mom_df.player, y = most_mom_df["num of time won"])
plt.ylabel("Number of times won",fontsize = 16)
plt.xticks(fontsize = 16,rotation = 90)
plt.title("MOST MAN OF THE MATCH AWARDS",{"fontsize":16});
season_column = matches.Season.unique()
season_column = np.sort(season_column)
highest_win_percentage_by_year = []
team_with_highest_win_percentage = []
for season in season_column:
df_by_year = matches[matches["Season"] == season]
team_most_wins = df_by_year.winner.value_counts().index[0]
number_of_wins = df_by_year.winner.value_counts()[0]
matches_played = len(df_by_year[(df_by_year.team1 == team_most_wins)|(df_by_year.team2 == team_most_wins)])
percentage = np.round(number_of_wins*100/matches_played,2)
highest_win_percentage_by_year.append(percentage)
team_with_highest_win_percentage.append(team_most_wins)
highest_win_percentage_by_year
team_with_highest_win_percentage
most_successful_team_by_year = pd.DataFrame({"Season" : season_column,"team" : team_with_highest_win_percentage,"win_percentage":highest_win_percentage_by_year})
most_successful_team_by_year
for index,data in enumerate(most_successful_team_by_year.win_percentage):
print(index,data)
plt.figure(figsize=(18,6))
sns.barplot(data = most_successful_team_by_year,x = "Season" ,y = "win_percentage")
plt.xlabel("Year",fontsize = 16)
plt.ylabel("Win Percentage",fontsize = 16)
for index,data in enumerate(most_successful_team_by_year.win_percentage):
plt.text(x=index - 0.25,y=data+1,s=f"{data}%",fontsize = 14)
for index,data in enumerate(most_successful_team_by_year.team):
plt.text(x=index - 0.35,y=0.6,s=f"{data}",fontsize = 16)
plt.xticks(fontsize=12)
plt.title("MOST SUCCESSFUL TEAM BY YEAR",fontsize=18,pad=30);
final = matches.groupby('Season').tail(1)
final['winner'].value_counts()
plt.figure(figsize=(10,8))
ax = plt.axes()
ax.set(facecolor = 'grey')
ax.grid(False)
sns.countplot(x=final['winner'],order = final['winner'].value_counts().index, linewidth = 5, palette = 'gist_ncar')
plt.title("IPL Champions",fontsize=20)
plt.xlabel('Teams',fontsize=20)
plt.ylabel('No.of Trophy',fontsize=20)
plt.xticks(rotation='0')
plt.show()
toss_factor = matches.toss_winner == matches.winner
toss_factor.value_counts()
toss_data = { "Matches_won_by_toss_winner" : 393,
"Matches_won_by_toss_looser" : 363}
toss_data = pd.Series(toss_data)
toss_data.index
plt.figure(figsize=(7,7))
plt.pie(x=toss_data,autopct="%.2f%%",explode=[0.03]*2,labels=toss_data.index);
bat_first = matches.venue[matches['result'] != 'wickets'].value_counts()[0:5].reset_index()
bat_first
plt.figure(figsize = (10,8))
ax = plt.axes()
ax.grid(False)
ax.set(facecolor = 'grey')
ax = sns.barplot(data =bat_first, x = 'venue', y = 'index',linewidth=5)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.title('Top 5 venues to win games batting first')
plt.xlabel('Total Wins')
plt.ylabel('Venues')
plt.show()
bat_second = matches.venue[matches['result'] != 'runs'].value_counts()[0:5].reset_index()
plt.figure(figsize = (10,8))
ax = plt.axes()
ax.grid(False)
ax.set(facecolor = 'lightblue')
ax = sns.barplot(data =bat_second, x = 'venue', y = 'index', palette = 'seismic',linewidth=5)
plt.title('Top 5 venues to win games batting second')
plt.xlabel('Total Wins')
plt.ylabel('Venues')
plt.show()
most_runs
most_runs_df = most_runs[:10]
plt.figure(figsize=(12,6))
sns.barplot(x = most_runs_df.batsman, y = most_runs_df["strikerate"])
plt.ylabel("Total score",fontsize = 16)
plt.xticks(fontsize = 16,rotation = 30)
plt.title("STRIKE RATE",{"fontsize":16});
plt.figure(figsize=(12,6))
sns.barplot(x = most_runs_df.batsman, y = most_runs_df["total_runs"])
plt.ylabel("Total score",fontsize = 16)
plt.xticks(fontsize = 16,rotation = 30)
plt.title("TOP 10 BATSMEN SCORE",{"fontsize":16});
fours = deliveries.batsman[deliveries['batsman_runs'] == 4 ].value_counts()[0:10].reset_index()
plt.figure(figsize = (15,8))
ax = plt.axes()
ax.grid(False)
ax.set(facecolor = 'grey')
ax = sns.barplot(data = fours, x = 'index', y = 'batsman', palette = 'Oranges',linewidth=5,)
plt.ylabel('Total no. of fours')
plt.xlabel('Batsman')
plt.xticks(fontsize=12,rotation = 0)
plt.yticks(fontsize=12)
plt.title('Top 5 batsman with most no. of fours')
plt.show()
six = deliveries.batsman[deliveries['batsman_runs'] == 6 ].value_counts()[0:10].reset_index()
plt.figure(figsize = (15,8))
ax = plt.axes()
ax.grid(False)
ax.set(facecolor = 'grey')
ax = sns.barplot(data = six, x = 'index', y = 'batsman', palette = 'Oranges',linewidth=5,)
plt.ylabel('Total no. of fours')
plt.xlabel('Batsman')
plt.xticks(fontsize=12,rotation = 90)
plt.yticks(fontsize=12)
plt.title('Top 5 batsman with most no. of sixes')
plt.show()
plt.figure(figsize=(12,6))
sns.barplot(x = home_away.team, y = home_away["home_win_percentage"])
plt.ylabel("Percentage ",fontsize = 16)
plt.xticks(fontsize = 16,rotation = 0)
plt.title("HOME WIN PERCENTAGE",{"fontsize":16});
plt.figure(figsize=(12,6))
sns.barplot(x = home_away.team, y = home_away["away_win_percentage"])
plt.ylabel("Percentage ",fontsize = 16)
plt.xticks(fontsize = 16,rotation = 0)
plt.title("AWAY WIN PERCENTAGE",{"fontsize":16});
plt.figure(figsize=(20,10))
plt.bar(home_away['team'],home_away['home_matches'],label="MATCHES",color='r',width=.5)
plt.bar(home_away['team'],home_away['home_wins'],label="WON", color='b',width=.5)
plt.legend()
plt.ylabel("Count ",fontsize = 16)
plt.xticks(fontsize = 16,rotation = 0)
plt.title("Home Matches and wins",{"fontsize":16});
plt.xticks(rotation=0)
plt.show()
players=players.replace({'Right-arm Medium':"Right-arm medium",'Left-arm fast-medium':"Left-arm medium-fast",
'Right-arm fast-medium':"Left-arm medium-fast","Right_hand":'Right_Hand'})
plt.figure(figsize=(12,6))
sns.countplot(x="Bowling_Skill", data=players)
plt.ylabel("No. of Player ",fontsize = 16)
plt.xticks(fontsize = 16,rotation = 90)
plt.title("BOWLING SKILL",{"fontsize":16});
plt.figure(figsize=(12,6))
sns.countplot(x="Batting_Hand", data=players)
plt.ylabel("No. of Player ",fontsize = 16)
plt.xticks(fontsize = 16,rotation = 0)
plt.title("BATTING HAND",{"fontsize":16});
players['Country'].value_counts().plot.bar(width=0.9,color='blue',alpha=0.75)
plt.xlabel('Countries',fontsize = 16)
plt.title("Countries vs Number of Players",{"fontsize":16});
plt.ylabel("Number of Players ",fontsize = 16)
plt.xticks(fontsize = 16,rotation = 30)
plt.show()
df_match_deliver = matches[['id','Season']].merge(deliveries, left_on = 'id', right_on = 'match_id', how = 'left').drop('id', axis = 1)
df_match_deliver
df = df_match_deliver[df_match_deliver['batsman_runs'] == 6]
sixes_by_season = df.groupby('Season')['batsman_runs'].count().reset_index()
plt.figure(figsize = (12,10))
ax = plt.axes()
ax.grid(False)
ax = sns.barplot(data =sixes_by_season, x = 'Season', y= 'batsman_runs',linewidth=3,)
for index, row in sixes_by_season.iterrows():
ax.text(row.name,row.batsman_runs, row.batsman_runs, color='purple', ha="center",size = 20)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.title('Total sixes by season')
plt.xlabel('Season')
plt.ylabel('Total Sixes')
plt.show()
d={}
i=0
df=df_match_deliver
lst = deliveries['dismissal_kind'].unique()
data = df[df['dismissal_kind'].apply(lambda x: True if x in lst and x != ' ' else False)].groupby(['Season','bowler']).count()['ball']
data=data.sort_values(ascending=False)[:30].sort_index(level=0)
val=0
lst=[]
for (season,bowler),wicket in data.items():
if season == val:
lst.append(wicket)
else:
d[i]= [season,bowler,wicket]
i+=1
val = season
lst=[]
wicket=pd.DataFrame.from_dict(d, orient='index',columns=['Year', 'Player', 'Wicket'])
wicket
plt.figure(figsize=(12,6))
sns.barplot(x = wicket.Player, y = wicket["Wicket"])
plt.ylabel("Wicket ",fontsize = 16)
plt.xticks(fontsize = 13,rotation = 0)
plt.title("Purple Cap Winner",{"fontsize":16});
data = df_match_deliver.groupby(['Season','batsman'])['batsman_runs'].sum().reset_index()
data.sort_values('batsman_runs',ascending=False,inplace = True)
data.drop_duplicates(subset=["Season"],keep="first",inplace = True)
fig = px.bar(data, x='batsman', y='batsman_runs',text ='Season',color = 'batsman')
fig.update_layout(
height=500,
title_text='Orange Cap Winners',
xaxis =dict(title = 'Season'),
yaxis = dict(title = 'Runs'),
)
fig.show()